pacman::p_load(olsrr, corrplot, ggpubr, sf, sfdep, spdep, GWmodel, tmap, tidyverse, gtsummary)Take-home Exercise 3
Modelling Geography of Financial Inclusion in Tanzania
1 Objectives
The objectives of this analysis is to build an explanatory model to determine the factors affecting financial inclusion by using geographical weighted regression methods.
Financial inclusion is the availability and equality of opportunities to access financial services, which include banking, loan, equity and insurance products.
2 Getting Started
2.1 Install and Load R Packages
2.2 Importing Geospatial Data
district_tz = st_read(dsn = "data/rawdata/geospatial", layer = "geoBoundaries-TZA-ADM2")Reading layer `geoBoundaries-TZA-ADM2' from data source
`C:\brigittatsai\ISSS626_AY2024-25_T1\Take-home_Ex\Take-home_Ex03\data\rawdata\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 170 features and 5 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 29.58953 ymin: -11.76235 xmax: 40.44473 ymax: -0.983143
Geodetic CRS: WGS 84
2.3 Importing Aspatial Data
tanzania = read_csv("data/rawdata/aspatial/tanzania.csv")Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 9915 Columns: 721
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (703): reg_name, dist_name, ward_code1, ward_name, ea_code, clustertype,...
dbl (13): SN, reg_code, dist_code, c8c, D6_1_1, D6_1_2, D6_1_3, gov_3, cmg4...
lgl (5): e_5_1, e_5_2, g_5_2__5, g_5_2__13, serv2_4
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
3 Data Wrangling
3.1 Aspatial Data Wrangling
3.1.1 Check Districts in Both Dataset
unique(tanzania$dist_name) [1] "Misungwi" "Missenyi" "Kyela" "Kongwa" "Ilala"
[6] "Iramba" "Mbogwe" "Handeni" "Chato" "Sengerema"
[11] "Ikungi" "Lindi" "Kilindi" "Namtumbo" "Wanging'ombe"
[16] "Mpwapwa" "Kilosa" "Mkalama" "Singida" "Tanganyika"
[21] "Mlele" "Nachingwea" "Mbozi" "Nkasi" "Bahi"
[26] "Iringa" "Chamwino" "Momba" "Ileje" "Sumbawanga"
[31] "Simanjiro" "Babati" "Kalambo" "Songwe" "Kilwa"
[36] "Geita" "Kilombero" "Temeke" "Kigamboni" "Muheza"
[41] "Mpanda" "Nyamagana" "Mtwara" "Ilemela" "Nanyumbu"
[46] "Bagamoyo" "Tarime" "Bukombe" "Mbeya" "Arumeru"
[51] "Ukerewe" "Masasi" "Chunya" "Butiama" "Karagwe"
[56] "Mbinga" "Dodoma" "Mkuranga" "Songea" "Tanga"
[61] "Rufiji" "Kondoa" "Buhigwe" "Mbarali" "Lushoto"
[66] "Kisarawe" "Ruangwa" "Musoma" "Kwimba" "Mkinga"
[71] "Uyui" "Kigoma" "Malinyi" "Nyasa" "Tandahimba"
[76] "Nyang'hwale" "Kaliua" "Igunga" "Hai" "Kiteto"
[81] "Newala" "Magu" "Kibondo" "Tunduru" "Nzega"
[86] "Monduli" "Rorya" "Liwale" "Bukoba" "Biharamulo"
[91] "Sikonge" "Kibiti" "Mbulu" "Bariadi" "Kasulu"
[96] "Moshi" "Kakonko" "Njombe" "Uvinza" "Chemba"
[101] "Rungwe" "Mufindi" "Mkoani" "Wete" "Micheweni"
[106] "Chake Chake" "Kati" "Kaskazini B" "Mjini" "Kusini"
[111] "Kaskazini A" "Kinondoni" "Kibaha" "Arusha" "Mwanga"
[116] "Magharibi B" "Magharibi A" "Serengeti" "Ludewa" "Kahama"
[121] "Hanang" "Gairo" "Morogoro" "Karatu" "Maswa"
[126] "Manyoni" "Meatu" "Kishapu" "Rombo" "Kilolo"
[131] "Pangani" "Urambo" "Busega" "Ulanga" "Shinyanga"
[136] "Makete" "Longido" "Itilima" "Ubungo" "Tabora"
[141] "Mvomero" "Same" "Bunda" "Kyerwa" "Siha"
[146] "Ngara" "Muleba" "Ngorongoro"
unique(district_tz$shapeName) [1] "Arusha" "Arusha Urban"
[3] "Karatu" "Longido"
[5] "Meru" "Monduli"
[7] "Ngorongoro" "Ilala"
[9] "Kinondoni" "Temeke"
[11] "Bahi" "Chamwino"
[13] "Chemba" "Dodoma Urban"
[15] "Kondoa" "Kongwa"
[17] "Mpwapwa" "Bukombe"
[19] "Chato" "Geita"
[21] "Mbogwe" "Nyang'hwale"
[23] "Iringa" "Iringa Urban"
[25] "Kilolo" "Mafinga Township Authority"
[27] "Mufindi" "Biharamulo"
[29] "Bukoba" "Bukoba Urban"
[31] "Karagwe" "Kyerwa"
[33] "Missenyi" "Muleba"
[35] "Ngara" "Micheweni"
[37] "Wete" "Kaskazini A"
[39] "Kaskazini B" "Mlele"
[41] "Mpanda" "Mpanda Urban"
[43] "Buhigwe" "Kakonko"
[45] "Kasulu" "Kasulu Township Authority"
[47] "Kibondo" "Kigoma"
[49] "Kigoma Urban" "Uvinza"
[51] "Hai" "Moshi"
[53] "Moshi Urban" "Mwanga"
[55] "Rombo" "Same"
[57] "Siha" "Chake Chake"
[59] "Mkoani" "Kati"
[61] "Kusini" "Kilwa"
[63] "Lindi" "Lindi Urban"
[65] "Liwale" "Nachingwea"
[67] "Ruangwa" "Babati"
[69] "Babati UrbanBabati Urban" "Hanang"
[71] "Kiteto" "Mbulu"
[73] "Simanjiro" "Bunda"
[75] "Butiam" "Musoma"
[77] "Musoma Urban" "Rorya"
[79] "Serengeti" "Tarime"
[81] "Chunya" "Kyela"
[83] "Mbarali" "Mbeya"
[85] "Mbeya Urban" "Rungwe"
[87] "Magharibi" "Mjini"
[89] "Gairo" "Kilombero"
[91] "Kilosa" "Morogoro"
[93] "Morogoro Urban" "Mvomero"
[95] "Ulanga" "Masasi"
[97] "Masasi Township Authority" "Mtwara"
[99] "Mtwara Urban" "Nanyumbu"
[101] "Newala" "Tandahimba"
[103] "Ilemela" "Kwimba"
[105] "Magu" "Misungwi"
[107] "Nyamagana" "Sengerema"
[109] "Ukerewe" "Ludewa"
[111] "Makambako Township Authority" "Makete"
[113] "Njombe" "Njombe Urban"
[115] "Wanging'ombe" "Bagamoyo"
[117] "Kibaha" "Kibaha Urban"
[119] "Kisarawe" "Mafia"
[121] "Mkuranga" "Rufiji"
[123] "Kalambo" "Nkasi"
[125] "Sumbawanga" "Sumbawanga Urban"
[127] "Mbinga" "Namtumbo"
[129] "Nyasa" "Songea"
[131] "Songea Urban" "Tunduru"
[133] "Kahama" "Kahama Township Authority"
[135] "Kishapu" "Shinyanga"
[137] "Shinyanga Urban" "Bariadi"
[139] "Busega" "Itilima"
[141] "Maswa" "Meatu"
[143] "Ikungi" "Iramba"
[145] "Manyoni" "Mkalama"
[147] "Singida" "Singida Urban"
[149] "Ileje" "Mbozi"
[151] "Momba" "Songwe"
[153] "Tunduma" "Igunga"
[155] "Kaliua" "Nzega"
[157] "Sikonge" "Tabora Urban"
[159] "Urambo" "Uyui"
[161] "Handeni" "Handeni Mji"
[163] "Kilindi" "Korogwe"
[165] "Korogwe Township Authority" "Lushoto"
[167] "Mkinga" "Muheza"
[169] "Pangani" "Tanga Urban"
length(intersect(tanzania$dist_name, district_tz$shapeName))[1] 136
Check if there is any duplicated survey respondent
tanzania$SN[duplicated(tanzania$SN)]numeric(0)
There is 0 duplicates in the Serial Number column.
3.1.2 Selecting Variables
The current Tanzania dataset consists of 721 column. Let us select out the variables that we are interested in for our analysis
tz <- tanzania %>%
select(
dist_name, clustertype,
c8c, c9, c11, e_5_2,
c23__1, c23__2, c23__3, BANKED, ins2,
mob2, c27__9, c27__10, c27__14,
D6_4a, e_3_3__4, e_3_3__5, e_3_3__6, e_3_3__7,
e_3_3__11, e_3_3__12, e_3_3__13, e_4,
f_3_2_1__1, f_3_2_1__2, f_3_2_1__3, f_3_2_1__4,
g_1_2_1__1, g_1_2_1__2, g_1_2_1__3, g_1_2_1__9, g_1_2_1__5,
Household_weight, population_wt
)3.1.3 Rename Columns
tz <- tz %>%
rename(
age = c8c,
gender = c9,
edu = c11,
fin_edu = e_5_2,
mm_access = c23__1,
int_access = c23__2,
laptop_access = c23__3,
bank = BANKED,
insurance = ins2,
mm = mob2,
pb_doc = c27__9,
bank_doc = c27__10,
insurance_doc = c27__14,
biz_registered = D6_4a,
bank_advice = e_3_3__4,
mfi_advice = e_3_3__5,
sacco_advice = e_3_3__6,
fin_advice = e_3_3__7,
sg_advice = e_3_3__11,
ml_advice = e_3_3__12,
gov_advice = e_3_3__13,
budget = e_4,
bank_save = f_3_2_1__1,
mfi_save = f_3_2_1__2,
sacco_save = f_3_2_1__3,
mm_save = f_3_2_1__4,
bank_borrow = g_1_2_1__1,
mfi_borrow = g_1_2_1__2,
sacco_borrow = g_1_2_1__3,
gov_borrow = g_1_2_1__9,
mm_borrow = g_1_2_1__5,
hh_wt = Household_weight,
pop_wt = population_wt
)3.1.4 Check Missing Value
sum(is.na(tz$biz_registered))[1] 5037
Missing values are non-business owners, let us fill in the missing values
tz$biz_registered[is.na(tz$biz_registered)] <- "no biz"Remove rows with missing values
tz <- tz %>% filter(!is.na(bank_borrow))tz <- tz %>% filter(!is.na(int_access))tz <- tz %>% filter(!is.na(laptop_access))tz_gwr <- tz %>%
group_by(dist_name) %>%
summarize(
urban_pct = mean(clustertype == "Urban") * 100,
rural_pct = mean(clustertype == "Rural") * 100,
female_pct = mean(gender == "Female") * 100,
male_pct = mean(gender == "Male") * 100,
no_edu_pct = mean(edu == "No formal education") * 100,
no_fin_edu_pct = mean(fin_edu == "FALSE") * 100,
no_mm_access_pct = mean(mm_access == "No") * 100,
no_int_access_pct = mean(int_access == "No") * 100,
no_laptop_access_pct = mean(laptop_access == "No") * 100,
no_bank_pct = mean(bank == "Not Banked") * 100,
no_insurance_pct = mean(insurance == "No") * 100,
no_mm_pct = mean(mm == "No") * 100,
no_pb_doc_pct = mean(pb_doc == "No") * 100,
no_bank_doc_pct = mean(bank_doc == "No") * 100,
no_insurance_doc_pct = mean(insurance_doc == "No") * 100,
no_biz_register_pct = mean(biz_registered == "No") * 100,
no_bank_advice_pct = mean(bank_advice == "No") * 100,
no_mfi_advice_pct = mean(mfi_advice == "No") * 100,
no_sacco_advice_pct = mean(sacco_advice == "No") * 100,
no_fin_advice_pct = mean(fin_advice == "No") * 100,
no_sg_advice_pct = mean(sg_advice == "No") * 100,
no_ml_advice_pct = mean(ml_advice == "No") * 100,
no_gov_advice_pct = mean(gov_advice == "No") * 100,
no_budget_pct = mean(budget == "No, I don’t agree") * 100,
no_bank_save_pct = mean(bank_save == "No") * 100,
no_mfi_save_pct = mean(mfi_save == "No") * 100,
no_sacco_save_pct = mean(sacco_save == "No") * 100,
no_mm_save_pct = mean(mm_save == "No") * 100,
no_bank_borrow_pct = mean(bank_borrow == "No") * 100,
no_mfi_borrow_pct = mean(mfi_borrow == "No") * 100,
no_sacco_borrow_pct = mean(sacco_borrow == "No") * 100,
no_gov_borrow_pct = mean(gov_borrow == "No") * 100,
no_mm_borrow_pct = mean(mm_borrow == "No") * 100,
avg_hh_wt = mean(hh_wt, na.rm = TRUE),
avg_pop_wt = mean(pop_wt, na.rm = TRUE)
)3.2 Geospatial Data Wrangling
3.2.1 Updating CRS Information
st_crs(district_tz)Coordinate Reference System:
User input: WGS 84
wkt:
GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Horizontal component of 3D system."],
AREA["World."],
BBOX[-90,-180,90,180]],
ID["EPSG",4326]]
district_tz <- st_transform(district_tz, 4210)st_crs(district_tz)Coordinate Reference System:
User input: EPSG:4210
wkt:
GEOGCRS["Arc 1960",
DATUM["Arc 1960",
ELLIPSOID["Clarke 1880 (RGS)",6378249.145,293.465,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Geodesy."],
AREA["Burundi, Kenya, Rwanda, Tanzania and Uganda."],
BBOX[-11.75,28.85,4.63,41.91]],
ID["EPSG",4210]]
3.2.2 Convert Multipolygon into Individual Polygons
Calculate the area of each polygon
sf_polygon <- district_tz %>%
st_cast("POLYGON") %>%
mutate(area = st_area(.))Warning in st_cast.sf(., "POLYGON"): repeating attributes for all
sub-geometries for which they may not be constant
3.2.3 Group by the unique name and select largest polygon by area
dist_tz <- sf_polygon %>%
group_by(shapeName) %>%
filter(area == max(area)) %>%
ungroup() %>%
select(-area) %>%
select(shapeName)3.3 Join Table
3.3.1 Perform Left Join
tz_gwr <- tz_gwr %>%
left_join(dist_tz, by = c("dist_name" = "shapeName"))3.3.2 Check for Empty Polygons
sum(st_is_empty(tz_gwr$geometry))[1] 12
There are 12 empty polygons, let’s drop the districts with no information
tz_gwr <- tz_gwr %>%
filter(!st_is_empty(geometry))3.3.3 Convert aspatial dataframe into sf object
tz_sf <- st_as_sf(tz_gwr)st_crs(tz_sf)Coordinate Reference System:
User input: EPSG:4210
wkt:
GEOGCRS["Arc 1960",
DATUM["Arc 1960",
ELLIPSOID["Clarke 1880 (RGS)",6378249.145,293.465,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Geodesy."],
AREA["Burundi, Kenya, Rwanda, Tanzania and Uganda."],
BBOX[-11.75,28.85,4.63,41.91]],
ID["EPSG",4210]]
3.3.4 Drop Districts
Drop districts that is unavailable in either of both datasets
dist_tz <- dist_tz %>%
filter(shapeName %in% tz_gwr$dist_name)4 Exploratory Data Analysis
4.1 EDA using statistical graphs
Let us check whether Tanzania consist of mostly urban or rural area
ggplot(data=tz_gwr, aes(x=`rural_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
We can see that the graph is skewed to the left and majority of the area are rural areas
Let us roughly find out percentage of people in Tanzania who has received financial education in their life
ggplot(data=tz_gwr, aes(x=`no_fin_edu_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
It turns out the data is also left-skewed, which means there are still a lot of people lacking with financial education in their life.
Let us plot all the EDAs to figure out the level of accessibility to financial services in Tanzania
NO_MM_ACCESS_PCT <- ggplot(data=tz_gwr, aes(x= `no_mm_access_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_INT_ACCESS_PCT <- ggplot(data=tz_gwr, aes(x= `no_int_access_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_LAPTOP_ACCESS_PCT <- ggplot(data=tz_gwr, aes(x= `no_laptop_access_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_BANK_PCT <- ggplot(data=tz_gwr, aes(x= `no_bank_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_INSURANCE_PCT <- ggplot(data=tz_gwr, aes(x= `no_insurance_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_MM_PCT <- ggplot(data=tz_gwr,
aes(x= `no_mm_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_PB_DOC_PCT <- ggplot(data=tz_gwr, aes(x= `no_pb_doc_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_BANK_DOC_PCT <- ggplot(data=tz_gwr, aes(x= `no_bank_doc_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_INSURANCE_DOC_PCT <- ggplot(data=tz_gwr, aes(x= `no_insurance_doc_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_BIZ_REGISTER_PCT <- ggplot(data=tz_gwr, aes(x= `no_biz_register_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_BANK_ADVICE_PCT <- ggplot(data=tz_gwr, aes(x= `no_bank_advice_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_MFI_ADVICE_PCT <- ggplot(data=tz_gwr, aes(x= `no_mfi_advice_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
ggarrange(NO_MM_ACCESS_PCT, NO_INT_ACCESS_PCT, NO_LAPTOP_ACCESS_PCT, NO_BANK_PCT, NO_INSURANCE_PCT,
NO_MM_PCT, NO_PB_DOC_PCT, NO_BANK_DOC_PCT, NO_INSURANCE_DOC_PCT, NO_BIZ_REGISTER_PCT,
NO_BANK_ADVICE_PCT, NO_MFI_ADVICE_PCT,
ncol = 3, nrow = 4)
As you can see from the graphs, people rarely have access to banks or microfinancial institutions or other financial institutions to gain financial advice.
NO_SACCO_ADVICE_PCT <- ggplot(data=tz_gwr, aes(x= `no_sacco_advice_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_FIN_ADVICE_PCT <- ggplot(data=tz_gwr, aes(x= `no_fin_advice_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_SG_ADVICE_PCT <- ggplot(data=tz_gwr, aes(x= `no_sg_advice_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_ML_ADVICE_PCT <- ggplot(data=tz_gwr, aes(x= `no_ml_advice_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_GOV_ADVICE_PCT <- ggplot(data=tz_gwr, aes(x= `no_gov_advice_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_BUDGET_PCT <- ggplot(data=tz_gwr, aes(x= `no_budget_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_BANK_SAVE_PCT <- ggplot(data=tz_gwr,
aes(x= `no_bank_save_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_MFI_SAVE_PCT <- ggplot(data=tz_gwr, aes(x= `no_mfi_save_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_SACCO_SAVE_PCT <- ggplot(data=tz_gwr, aes(x= `no_sacco_save_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_MM_SAVE_PCT <- ggplot(data=tz_gwr, aes(x= `no_mm_save_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_BANK_BORROW_PCT <- ggplot(data=tz_gwr, aes(x= `no_bank_borrow_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_MFI_BORROW_PCT <- ggplot(data=tz_gwr, aes(x= `no_mfi_borrow_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_SACCO_BORROW_PCT <- ggplot(data=tz_gwr, aes(x= `no_sacco_borrow_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_GOV_BORROW_PCT <- ggplot(data=tz_gwr, aes(x= `no_gov_borrow_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
NO_MM_BORROW_PCT <- ggplot(data=tz_gwr, aes(x= `no_mm_borrow_pct`)) +
geom_histogram(bins=20, color="black", fill="light blue")
ggarrange(NO_SACCO_ADVICE_PCT, NO_FIN_ADVICE_PCT, NO_SG_ADVICE_PCT, NO_ML_ADVICE_PCT, NO_GOV_ADVICE_PCT,
NO_BUDGET_PCT, NO_BANK_SAVE_PCT, NO_MFI_SAVE_PCT, NO_SACCO_SAVE_PCT, NO_MM_SAVE_PCT,
NO_BANK_BORROW_PCT, NO_MFI_BORROW_PCT, NO_SACCO_BORROW_PCT, NO_GOV_BORROW_PCT, NO_MM_BORROW_PCT,
ncol = 3, nrow = 5)
4.2 Drawing Statistical Map
tmap_mode("plot")tmap mode set to plotting
tm_shape(dist_tz)+
tm_polygons() +
tm_shape(tz_sf) +
tm_fill(col="no_bank_pct") +
tm_layout(main.title = "Percentage of unbanked in Tanzania",
main.title.size = 1,
main.title.position = c("center"),
main.title.fontface = "bold") 
From the tmap above, we can see that most of the area tends to be a darker color, which means that the majority of people there have no access to banks.
tmap_mode("plot")tmap mode set to plotting
tm_shape(dist_tz)+
tm_polygons() +
tm_shape(tz_sf) +
tm_fill(col="no_insurance_pct") +
tm_layout(main.title = "No insurance population in Tanzania",
main.title.size = 1,
main.title.position = c("center"),
main.title.fontface = "bold") 
tm_shape(dist_tz)+
tm_polygons() +
tm_shape(tz_sf) +
tm_fill(col="no_mm_access_pct") +
tm_layout(main.title = "No mobile access in Tanzania",
main.title.size = 1,
main.title.position = c("center"),
main.title.fontface = "bold") 
From the map above, we can see that most of the districts in Tanzania has mobile access
tm_shape(dist_tz)+
tm_polygons() +
tm_shape(tz_sf) +
tm_fill(col="no_int_access_pct") +
tm_layout(main.title = "No internet access in Tanzania",
main.title.size = 1,
main.title.position = c("center"),
main.title.fontface = "bold") 
Even though mobile phones are very accessible, there are still a lot of areas without internet access, which can be quite concerning.
tm_shape(dist_tz)+
tm_polygons() +
tm_shape(tz_sf) +
tm_fill(col="no_budget_pct") +
tm_layout(main.title = "Awareness to do budgeting in Tanzania",
main.title.size = 1,
main.title.position = c("center"),
main.title.fontface = "bold") 
From the map, we can see that mostly people do budgeting in their daily lives, this is a good sign for their financial awareness.
tm_shape(dist_tz)+
tm_polygons() +
tm_shape(tz_sf) +
tm_fill(col="no_fin_edu_pct") +
tm_layout(main.title = "Financial Education in Tanzania",
main.title.size = 1,
main.title.position = c("center"),
main.title.fontface = "bold") 
The map above shows a quite concerning result. Most area in Tanzania have not received a proper financial education.
5 Geographically Weighted Regression
5.1 Simple Linear Regression
5.1.1 Percentage of Unbanked and Cluster Type
The simple linear regression model below explains the linearity between dependent variable of unbanked population and independent variable of rural areas.
tz.slr <- lm(formula=no_bank_pct ~ rural_pct, data = tz_sf)summary(tz.slr)
Call:
lm(formula = no_bank_pct ~ rural_pct, data = tz_sf)
Residuals:
Min 1Q Median 3Q Max
-27.5102 -4.9487 0.8344 7.0104 16.9187
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 60.8901 2.5572 23.811 < 2e-16 ***
rural_pct 0.2662 0.0315 8.451 4.28e-14 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 9.153 on 134 degrees of freedom
Multiple R-squared: 0.3477, Adjusted R-squared: 0.3428
F-statistic: 71.41 on 1 and 134 DF, p-value: 4.276e-14
The output can be explained by the following formula
y = 60.8901 + 0.2662x1
The R-squared of 0.3477 reveals that the simple linear regression model is able to explain about 35% of the unbanked population.
The p-value is much smaller than 0.0001, we reject the null hypothesis of B0 and B1 = 0. Result shows that B0 and B1 are good parameter estimates.
Let us visualize on a scatterplot
ggplot(data=tz_sf,
aes(x=`rural_pct`, y=`no_bank_pct`)) +
geom_point() +
geom_smooth(method = lm)`geom_smooth()` using formula = 'y ~ x'

The spread of data points shows that there is a moderate correlation, but it is a positive correlation between rural area population and people with no access to banks
5.1.2 Percentage of Insurance
Is the access to insurance service limited to people without financial education?
tz.slr1 <- lm(formula=no_insurance_pct ~ no_fin_edu_pct, data = tz_sf)summary(tz.slr1)
Call:
lm(formula = no_insurance_pct ~ no_fin_edu_pct, data = tz_sf)
Residuals:
Min 1Q Median 3Q Max
-15.2421 -3.8667 0.4675 3.8412 11.8062
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 62.44351 4.67409 13.360 < 2e-16 ***
no_fin_edu_pct 0.32188 0.05379 5.984 1.88e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.511 on 134 degrees of freedom
Multiple R-squared: 0.2109, Adjusted R-squared: 0.205
F-statistic: 35.81 on 1 and 134 DF, p-value: 1.877e-08
y = 62.44351 + 0.32188x1
There is a positive correlation between people without insurance and level of financial education
ggplot(data=tz_sf,
aes(x=`no_fin_edu_pct`, y=`no_insurance_pct`)) +
geom_point() +
geom_smooth(method = lm)`geom_smooth()` using formula = 'y ~ x'

The data points are quite scattered, meaning the correlation is not very strong
5.2 Multiple Linear Regression
5.2.1 Correlation Matrix
Let us visualize the correlation using scatterplot matrix
corr_columns <- c("rural_pct", "no_edu_pct", "no_fin_edu_pct",
"no_mm_access_pct", "no_int_access_pct", "no_laptop_access_pct",
"no_bank_pct", "no_insurance_pct", "no_mm_pct")corr_columns2 <- c("rural_pct", "no_edu_pct", "no_fin_edu_pct",
"no_pb_doc_pct", "no_bank_doc_pct", "no_insurance_doc_pct",
"no_biz_register_pct")corr_columns3 <- c("rural_pct", "no_edu_pct", "no_fin_edu_pct",
"no_bank_advice_pct", "no_mfi_advice_pct",
"no_sacco_advice_pct", "no_fin_advice_pct", "no_sg_advice_pct",
"no_ml_advice_pct", "no_gov_advice_pct")corr_columns4 <- c("rural_pct", "no_edu_pct", "no_fin_edu_pct",
"no_budget_pct", "no_bank_save_pct", "no_mfi_save_pct",
"no_sacco_save_pct", "no_mm_save_pct")corr_columns5 <- c("rural_pct", "no_edu_pct", "no_fin_edu_pct",
"no_budget_pct", "no_bank_borrow_pct",
"no_mfi_borrow_pct", "no_sacco_borrow_pct", "no_gov_borrow_pct",
"no_mm_borrow_pct")corrplot(cor(tz_gwr[, corr_columns]), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.8, method = "number", type = "upper")
corrplot(cor(tz_gwr[, corr_columns2]), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.8, method = "number", type = "upper")
corrplot(cor(tz_gwr[, corr_columns3]), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.8, method = "number", type = "upper")
corrplot(cor(tz_gwr[, corr_columns4]), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.8, method = "number", type = "upper")
corrplot(cor(tz_gwr[, corr_columns5]), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.8, method = "number", type = "upper")
5.2.2 Build Multiple Linear Regression Model
tz.mlr <- lm(formula = no_bank_pct ~ rural_pct + female_pct +
no_edu_pct + no_fin_edu_pct +
no_mm_access_pct + no_int_access_pct + no_laptop_access_pct +
no_insurance_pct + no_bank_advice_pct + no_fin_advice_pct,
data=tz_sf)
summary(tz.mlr)
Call:
lm(formula = no_bank_pct ~ rural_pct + female_pct + no_edu_pct +
no_fin_edu_pct + no_mm_access_pct + no_int_access_pct + no_laptop_access_pct +
no_insurance_pct + no_bank_advice_pct + no_fin_advice_pct,
data = tz_sf)
Residuals:
Min 1Q Median 3Q Max
-25.541 -4.173 1.540 5.194 17.549
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -51.94745 208.16944 -0.250 0.803350
rural_pct 0.13218 0.03848 3.435 0.000804 ***
female_pct 0.10482 0.08723 1.202 0.231761
no_edu_pct 0.11485 0.06600 1.740 0.084291 .
no_fin_edu_pct 0.20114 0.10087 1.994 0.048313 *
no_mm_access_pct -0.10305 0.06519 -1.581 0.116437
no_int_access_pct 0.24890 0.05844 4.259 3.99e-05 ***
no_laptop_access_pct -0.05461 0.16904 -0.323 0.747191
no_insurance_pct 0.24282 0.12946 1.876 0.063034 .
no_bank_advice_pct 1.71260 1.19961 1.428 0.155893
no_fin_advice_pct -1.06708 2.61918 -0.407 0.684403
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7.719 on 125 degrees of freedom
Multiple R-squared: 0.5673, Adjusted R-squared: 0.5326
F-statistic: 16.39 on 10 and 125 DF, p-value: < 2.2e-16
From the result above, we can see that the parameters such as rural percentage, financial education, no internet access are statistically significant, meaning that these 3 parameters impacted the level of accessibility towards financial services in Tanzania.
5.2.3 Publication Quality Table: olsrr method
tz.mlr1 <- lm(formula = no_bank_pct ~ rural_pct + female_pct +
no_edu_pct + no_fin_edu_pct +
no_mm_access_pct + no_int_access_pct +
no_laptop_access_pct + no_insurance_pct +
no_bank_advice_pct + no_fin_advice_pct,
data=tz_sf)
ols_regress(tz.mlr1) Model Summary
---------------------------------------------------------------
R 0.753 RMSE 7.400
R-Squared 0.567 MSE 59.577
Adj. R-Squared 0.533 Coef. Var 9.476
Pred R-Squared 0.496 AIC 954.349
MAE 5.773 SBC 989.301
---------------------------------------------------------------
RMSE: Root Mean Square Error
MSE: Mean Square Error
MAE: Mean Absolute Error
AIC: Akaike Information Criteria
SBC: Schwarz Bayesian Criteria
ANOVA
----------------------------------------------------------------------
Sum of
Squares DF Mean Square F Sig.
----------------------------------------------------------------------
Regression 9762.109 10 976.211 16.386 0.0000
Residual 7447.091 125 59.577
Total 17209.200 135
----------------------------------------------------------------------
Parameter Estimates
-----------------------------------------------------------------------------------------------------
model Beta Std. Error Std. Beta t Sig lower upper
-----------------------------------------------------------------------------------------------------
(Intercept) -51.947 208.169 -0.250 0.803 -463.941 360.046
rural_pct 0.132 0.038 0.293 3.435 0.001 0.056 0.208
female_pct 0.105 0.087 0.071 1.202 0.232 -0.068 0.277
no_edu_pct 0.115 0.066 0.122 1.740 0.084 -0.016 0.245
no_fin_edu_pct 0.201 0.101 0.157 1.994 0.048 0.002 0.401
no_mm_access_pct -0.103 0.065 -0.108 -1.581 0.116 -0.232 0.026
no_int_access_pct 0.249 0.058 0.354 4.259 0.000 0.133 0.365
no_laptop_access_pct -0.055 0.169 -0.030 -0.323 0.747 -0.389 0.280
no_insurance_pct 0.243 0.129 0.133 1.876 0.063 -0.013 0.499
no_bank_advice_pct 1.713 1.200 0.117 1.428 0.156 -0.662 4.087
no_fin_advice_pct -1.067 2.619 -0.033 -0.407 0.684 -6.251 4.117
-----------------------------------------------------------------------------------------------------
5.2.4 Publication Quality Table: gtsummary method
tbl_regression(tz.mlr1, intercept = TRUE)Characteristic |
Beta |
95% CI 1 |
p-value |
|---|---|---|---|
| (Intercept) | -52 | -464, 360 | 0.8 |
| rural_pct | 0.13 | 0.06, 0.21 | <0.001 |
| female_pct | 0.10 | -0.07, 0.28 | 0.2 |
| no_edu_pct | 0.11 | -0.02, 0.25 | 0.084 |
| no_fin_edu_pct | 0.20 | 0.00, 0.40 | 0.048 |
| no_mm_access_pct | -0.10 | -0.23, 0.03 | 0.12 |
| no_int_access_pct | 0.25 | 0.13, 0.36 | <0.001 |
| no_laptop_access_pct | -0.05 | -0.39, 0.28 | 0.7 |
| no_insurance_pct | 0.24 | -0.01, 0.50 | 0.063 |
| no_bank_advice_pct | 1.7 | -0.66, 4.1 | 0.2 |
| no_fin_advice_pct | -1.1 | -6.3, 4.1 | 0.7 |
| 1
CI = Confidence Interval |
|||
This table provides a clearer view to assess the Beta, confidence interval and p-value.
5.3 GW Model
5.3.1 Fixed Bandwidth GWR Model
Convert sf dataframe to sp
tz.sp <- as_Spatial(tz_sf)bw.fixed <- bw.gwr(formula = no_bank_pct ~ rural_pct + female_pct +
no_edu_pct + no_fin_edu_pct +
no_mm_access_pct + no_int_access_pct +
no_laptop_access_pct + no_insurance_pct +
no_bank_advice_pct + no_fin_advice_pct,
data=tz.sp,
approach="CV",
kernel="gaussian",
adaptive=FALSE,
longlat=FALSE)Fixed bandwidth: 8.058107 CV score: 8244.491
Fixed bandwidth: 4.98118 CV score: 7798.919
Fixed bandwidth: 3.079535 CV score: 7501.337
Fixed bandwidth: 1.904253 CV score: 7951.643
Fixed bandwidth: 3.805899 CV score: 7567.353
Fixed bandwidth: 2.630617 CV score: 7518.972
Fixed bandwidth: 3.356981 CV score: 7513.505
Fixed bandwidth: 2.908063 CV score: 7502.744
Fixed bandwidth: 3.18551 CV score: 7503.873
Fixed bandwidth: 3.014038 CV score: 7501.08
Fixed bandwidth: 2.973559 CV score: 7501.415
Fixed bandwidth: 3.039056 CV score: 7501.061
Fixed bandwidth: 3.054517 CV score: 7501.121
Fixed bandwidth: 3.0295 CV score: 7501.051
Fixed bandwidth: 3.023594 CV score: 7501.055
Fixed bandwidth: 3.03315 CV score: 7501.052
Fixed bandwidth: 3.027244 CV score: 7501.052
Fixed bandwidth: 3.030894 CV score: 7501.051
Fixed bandwidth: 3.028638 CV score: 7501.051
Fixed bandwidth: 3.030032 CV score: 7501.051
gwr.fixed <- gwr.basic(formula = no_bank_pct ~ rural_pct + female_pct +
no_edu_pct + no_fin_edu_pct +
no_mm_access_pct + no_int_access_pct +
no_laptop_access_pct + no_insurance_pct +
no_bank_advice_pct + no_fin_advice_pct,
data=tz.sp,
bw=bw.fixed,
kernel = 'gaussian',
longlat = FALSE)gwr.fixed ***********************************************************************
* Package GWmodel *
***********************************************************************
Program starts at: 2024-11-10 15:54:33.898423
Call:
gwr.basic(formula = no_bank_pct ~ rural_pct + female_pct + no_edu_pct +
no_fin_edu_pct + no_mm_access_pct + no_int_access_pct + no_laptop_access_pct +
no_insurance_pct + no_bank_advice_pct + no_fin_advice_pct,
data = tz.sp, bw = bw.fixed, kernel = "gaussian", longlat = FALSE)
Dependent (y) variable: no_bank_pct
Independent variables: rural_pct female_pct no_edu_pct no_fin_edu_pct no_mm_access_pct no_int_access_pct no_laptop_access_pct no_insurance_pct no_bank_advice_pct no_fin_advice_pct
Number of data points: 136
***********************************************************************
* Results of Global Regression *
***********************************************************************
Call:
lm(formula = formula, data = data)
Residuals:
Min 1Q Median 3Q Max
-25.541 -4.173 1.540 5.194 17.549
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -51.94745 208.16944 -0.250 0.803350
rural_pct 0.13218 0.03848 3.435 0.000804 ***
female_pct 0.10482 0.08723 1.202 0.231761
no_edu_pct 0.11485 0.06600 1.740 0.084291 .
no_fin_edu_pct 0.20114 0.10087 1.994 0.048313 *
no_mm_access_pct -0.10305 0.06519 -1.581 0.116437
no_int_access_pct 0.24890 0.05844 4.259 3.99e-05 ***
no_laptop_access_pct -0.05461 0.16904 -0.323 0.747191
no_insurance_pct 0.24282 0.12946 1.876 0.063034 .
no_bank_advice_pct 1.71260 1.19961 1.428 0.155893
no_fin_advice_pct -1.06708 2.61918 -0.407 0.684403
---Significance stars
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7.719 on 125 degrees of freedom
Multiple R-squared: 0.5673
Adjusted R-squared: 0.5326
F-statistic: 16.39 on 10 and 125 DF, p-value: < 2.2e-16
***Extra Diagnostic information
Residual sum of squares: 7447.091
Sigma(hat): 7.454885
AIC: 954.3489
AICc: 956.8855
BIC: 912.2526
***********************************************************************
* Results of Geographically Weighted Regression *
***********************************************************************
*********************Model calibration information*********************
Kernel function: gaussian
Fixed bandwidth: 3.030032
Regression points: the same locations as observations are used.
Distance metric: Euclidean distance metric is used.
****************Summary of GWR coefficient estimates:******************
Min. 1st Qu. Median 3rd Qu.
Intercept -385.315128 -295.644154 -159.492443 -56.154825
rural_pct 0.049763 0.099210 0.126868 0.143830
female_pct 0.027938 0.045475 0.060680 0.098681
no_edu_pct 0.027371 0.059488 0.086054 0.128906
no_fin_edu_pct 0.043082 0.126680 0.201808 0.238602
no_mm_access_pct -0.417660 -0.128560 0.035467 0.066068
no_int_access_pct 0.142357 0.177954 0.191224 0.196224
no_laptop_access_pct -0.270523 -0.154195 -0.015231 0.253945
no_insurance_pct 0.149959 0.229173 0.310503 0.345027
no_bank_advice_pct 0.368094 1.131234 1.558512 1.751883
no_fin_advice_pct -5.532509 -0.462136 0.651674 1.484986
Max.
Intercept 282.7846
rural_pct 0.1861
female_pct 0.1879
no_edu_pct 0.2050
no_fin_edu_pct 0.3343
no_mm_access_pct 0.1623
no_int_access_pct 0.2124
no_laptop_access_pct 0.7263
no_insurance_pct 0.4596
no_bank_advice_pct 2.7385
no_fin_advice_pct 2.0872
************************Diagnostic information*************************
Number of data points: 136
Effective number of parameters (2trace(S) - trace(S'S)): 34.56652
Effective degrees of freedom (n-2trace(S) + trace(S'S)): 101.4335
AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 943.6715
AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 900.105
BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 868.6595
Residual sum of squares: 4898.628
R-square value: 0.7153483
Adjusted R-square value: 0.6173788
***********************************************************************
Program stops at: 2024-11-10 15:54:33.911333
5.3.2 Adaptive Bandwidth GWR Model
bw.adaptive <- bw.gwr(formula = no_bank_pct ~ rural_pct + female_pct +
no_edu_pct + no_fin_edu_pct +
no_mm_access_pct + no_int_access_pct +
no_laptop_access_pct + no_insurance_pct +
no_bank_advice_pct + no_fin_advice_pct,
data=tz.sp,
approach="CV",
kernel="gaussian",
adaptive=TRUE,
longlat=FALSE)Adaptive bandwidth: 91 CV score: 8143.955
Adaptive bandwidth: 64 CV score: 7976.207
Adaptive bandwidth: 46 CV score: 7933.874
Adaptive bandwidth: 36 CV score: 7925.873
Adaptive bandwidth: 29 CV score: 7952.761
Adaptive bandwidth: 39 CV score: 7916.93
Adaptive bandwidth: 42 CV score: 7932.145
Adaptive bandwidth: 38 CV score: 7904.127
Adaptive bandwidth: 36 CV score: 7925.873
Adaptive bandwidth: 38 CV score: 7904.127
Result shows that 38 is the recommended data points to be used
gwr.adaptive <- gwr.basic(formula = no_bank_pct ~ rural_pct + female_pct +
no_edu_pct + no_fin_edu_pct +
no_mm_access_pct + no_int_access_pct +
no_laptop_access_pct + no_insurance_pct +
no_bank_advice_pct + no_fin_advice_pct,
data=tz.sp,
bw=bw.adaptive,
kernel = 'gaussian',
adaptive = TRUE,
longlat = FALSE)gwr.adaptive ***********************************************************************
* Package GWmodel *
***********************************************************************
Program starts at: 2024-11-10 15:54:33.999346
Call:
gwr.basic(formula = no_bank_pct ~ rural_pct + female_pct + no_edu_pct +
no_fin_edu_pct + no_mm_access_pct + no_int_access_pct + no_laptop_access_pct +
no_insurance_pct + no_bank_advice_pct + no_fin_advice_pct,
data = tz.sp, bw = bw.adaptive, kernel = "gaussian", adaptive = TRUE,
longlat = FALSE)
Dependent (y) variable: no_bank_pct
Independent variables: rural_pct female_pct no_edu_pct no_fin_edu_pct no_mm_access_pct no_int_access_pct no_laptop_access_pct no_insurance_pct no_bank_advice_pct no_fin_advice_pct
Number of data points: 136
***********************************************************************
* Results of Global Regression *
***********************************************************************
Call:
lm(formula = formula, data = data)
Residuals:
Min 1Q Median 3Q Max
-25.541 -4.173 1.540 5.194 17.549
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -51.94745 208.16944 -0.250 0.803350
rural_pct 0.13218 0.03848 3.435 0.000804 ***
female_pct 0.10482 0.08723 1.202 0.231761
no_edu_pct 0.11485 0.06600 1.740 0.084291 .
no_fin_edu_pct 0.20114 0.10087 1.994 0.048313 *
no_mm_access_pct -0.10305 0.06519 -1.581 0.116437
no_int_access_pct 0.24890 0.05844 4.259 3.99e-05 ***
no_laptop_access_pct -0.05461 0.16904 -0.323 0.747191
no_insurance_pct 0.24282 0.12946 1.876 0.063034 .
no_bank_advice_pct 1.71260 1.19961 1.428 0.155893
no_fin_advice_pct -1.06708 2.61918 -0.407 0.684403
---Significance stars
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7.719 on 125 degrees of freedom
Multiple R-squared: 0.5673
Adjusted R-squared: 0.5326
F-statistic: 16.39 on 10 and 125 DF, p-value: < 2.2e-16
***Extra Diagnostic information
Residual sum of squares: 7447.091
Sigma(hat): 7.454885
AIC: 954.3489
AICc: 956.8855
BIC: 912.2526
***********************************************************************
* Results of Geographically Weighted Regression *
***********************************************************************
*********************Model calibration information*********************
Kernel function: gaussian
Adaptive bandwidth: 38 (number of nearest neighbours)
Regression points: the same locations as observations are used.
Distance metric: Euclidean distance metric is used.
****************Summary of GWR coefficient estimates:******************
Min. 1st Qu. Median 3rd Qu. Max.
Intercept -440.882937 -285.038239 -155.304839 -61.370596 57.7791
rural_pct 0.080711 0.105720 0.124825 0.142687 0.1594
female_pct 0.028002 0.044485 0.069584 0.101263 0.1345
no_edu_pct 0.017595 0.063886 0.087882 0.133182 0.1880
no_fin_edu_pct 0.098236 0.154510 0.196705 0.246374 0.3369
no_mm_access_pct -0.303082 -0.146644 0.025093 0.058471 0.1029
no_int_access_pct 0.162930 0.188703 0.197107 0.207036 0.2202
no_laptop_access_pct -0.232922 -0.162167 -0.018073 0.223229 0.3451
no_insurance_pct 0.166970 0.223337 0.300942 0.336816 0.3929
no_bank_advice_pct 0.264609 1.058209 1.580178 1.788177 2.1150
no_fin_advice_pct -2.519941 -0.635440 0.683280 1.503728 2.6065
************************Diagnostic information*************************
Number of data points: 136
Effective number of parameters (2trace(S) - trace(S'S)): 31.16932
Effective degrees of freedom (n-2trace(S) + trace(S'S)): 104.8307
AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 952.7279
AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 914.6376
BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 873.0636
Residual sum of squares: 5555.823
R-square value: 0.6771597
Adjusted R-square value: 0.5802451
***********************************************************************
Program stops at: 2024-11-10 15:54:34.012881
5.4 Converting SDF to sf dataframe
tz_adaptive <- st_as_sf(gwr.adaptive$SDF) %>%
st_transform(crs=4210)gwr.adaptive.output <- as.data.frame(gwr.adaptive$SDF)
tz_sf.adaptive <- cbind(tz_adaptive, as.matrix(gwr.adaptive.output))5.5 Visualising Local R2
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(dist_tz)+
tm_polygons(alpha = 0.1) +
tm_shape(tz_sf.adaptive) +
tm_dots(col = "Local_R2",
border.col = "gray60",
border.lwd = 1
)